"""
按标题分割Markdown
"""
from langchain_text_splitters import MarkdownHeaderTextSplitter

headers_to_split_on = [
    ("#", "Header 1"),
    ("##", "Header 2"),
    ("###", "Header 3"),
]

file_path = "../data/document/大数据应用发展趋势.md"
with open(file_path, "r", encoding="utf-8") as f:
    texts = f.read()
print(texts)
print("-" * 80)

splitter = MarkdownHeaderTextSplitter(headers_to_split_on)
documents = splitter.split_text(texts)
print("分为多少块内容:", len(documents))
